{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Feature selection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "X, y = datasets.make_regression(1000, 10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import feature_selection\n",
    "f, p = feature_selection.f_regression(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.71769054,  0.33235151,  0.20297762,  2.48072786,  0.82929345])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "f[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.39710561,  0.5644077 ,  0.65242591,  0.11556583,  0.36269681])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "530"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "idx = np.arange(0, X.shape[1])\n",
    "features_to_keep = idx[p < .05]\n",
    "len(features_to_keep)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1000L, 4858L)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "var_threshold = feature_selection.VarianceThreshold(np.median(np.var(X, axis=1)))\n",
    "var_threshold.fit_transform(X).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X, y = datasets.make_regression(10000, 20)\n",
    "f, p = feature_selection.f_regression(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.text.Text at 0x98e8fd0>"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa8AAAFCCAYAAABGsVJdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFLJJREFUeJzt3X+wZGdd5/H3Z5IKFEIQiEyTicmuolLGhUBJRGHXi6kl\nk6zrUNSWlaRWJdauY2mEqi3XgL8yAXcX/1GxEDUQs4LGuPLDxCWYWTVTigYSa5OAZpIJQsLMJDMB\nEhDC1u6YfPePPpP03Ll3bt97z719nun3q+rU7T799Dnf/nH6c59zntOdqkKSpJZsmXUBkiStluEl\nSWqO4SVJao7hJUlqjuElSWqO4SVJao7hJZ3kklyV5P2zrkPqk+Gl5iR5IMnXkvxjkq90f0frXOb3\nJtnfV40D5AmdOqmcOusCpDUo4N9U1a09LjOs4wM+ySlV9USP9Ug6AXtealWWnJm8KslfJ3ksyZ1J\nvnfitjcmuafrqX06yY91858F3AycOdmTS3JdkrdN3P+Y3lmSzyb5mSR3A19NsiXJi5J8IMkjSf4h\nyU8t+wDGy//NJLu7dd6a5Oxl2t6c5CcWzbsryeu7y7+W5HNJvpzkjiSvWWY5x/Uwu8fxfd3lJHlL\n9/x8PskNSb6+u+0ZSd6f5Avd8/uJJN+w3OOTNpLhpZNGkjOB/wm8raqeB/w08MEkL+iaHAYurqrT\ngcuBX01yXlV9DbgIeKiqnlNVp1fVoWVWs7h3dkl336/vbvsT4E7gRcAFwJuT/OsTlH0ZcDXwAuBu\n4PeXafcHXdujj/XbgbOBj3SzbgdeCjwPuB74oySnTfkYJr0J+AHgXwJnAo8B7+5u+xHgdGAb8Hzg\nx4H/c4JlSRvG8FKr/jjJo930oW7evwc+UlW3AFTVnwN/C1zcXf9oVT3QXf4rYDfjD+n1eGdVPVRV\n/xd4JXBGVf2XqnqiW9d7GQfccj5SVX9dVUeAnwO+O8m2Jdp9GHhZkm/srl8GfKi7H1V1fVV9qaqe\nrKpfBZ4BfNsaHs9O4Oeq6uFu2W8D/l2SLcARxiH7rTV2Z1V9dQ3rkNbN8FKrdlTV87vpDd28c4Af\nnAi1x4BXM+4FkeSiJLcl+WJ320XAGeus48DE5XOAbYvW/1bghSe4/1O78KrqceBRxj2eY3QhcTNP\nB+GlTPTSkvx0t0v0sW69p7O2x3YO8OGjjwG4h3FobQXeD9wC3JDkQJJ3JDllDeuQ1s0BG2rVUse8\n9gPvq6qdxzUe70L7AOPe2Y1V9WSSD08sZ6ldaY8Dz5q4/qIl2kzebz/wmapaTY/naE+KJM9mvDvu\noWXa/gFwVZK/Ap5xdMBKd3zrPwOvrap7unmPsvRzdMxj6sJn8rjV54Afrarblqnh7cDbu2NzHwXu\nA65b6UFKfbPnpZPJ7wH/NsnrusETz+wGKJwJnNZNX+iC6yLgdRP3PQy8IMnpE/PuAi5O8rxuKP6b\nV1j/7cBXukEcz0xySpJzk3znCe5zcZLv6cL17cBtVXVwmbY3M+4ZvQ34w4n5z2HcO/piktOS/GI3\nbyn7gGd2vdBTgZ9n/Lwc9dvAfz06cCTJNyT5ge7yQpLv6HYhfrVb55MneGzShjG81KIlBxxU1QFg\nB/CzwOeBBxkP2tjS7XZ7E+OBDI8y3v1248R972Pcs/lMt8tsxHg32SeBB4A/BW44UR1V9STw/cB5\nwGeBR4D3MN6Ft5zrgV3AF4GXM+4ZLv2gq/4f8CHGA0Gun7jplm7a1633a0zsjly0jH8EfgK4lvEu\nz69w7K7PdzJ+XnYn+TLwN8D53W0jxr3XLwN/D9zK+DmSNl1W+jHKJNcy3iAPV9VLl2nz64yPHzwO\nvLGq7uq7UOlkk+Q6YH9V/eKsa5FaM03P6zrgwuVu7Ha/fHNVfQvjkUq/1VNtkiQtacXwqqqPMT7X\nYzk7gPd1bT8BPDfJ1n7Kk05qfmWTtEZ9jDbcxrH71w928w73sGzppFVVPzrrGqRWOWBDktScPnpe\nB5k4VwU4q5t3nCTuJpEkHaeqlvy+0uVM2/MKy3wRKnAT8MMw/lJU4EtVtewuw6pymvF01VVXzbwG\nJ1+HIUy+BsOY1mLFnleS64EFxidwfg64ivFJjVVV11TVzUkuTvJpxkPlL19TJZIkTWnF8Kqqy6Zo\nc0U/5UiStDIHbMyhhYWFWZcgfB2GwNegXSt+w0avK0tqM9cnSRq+JNQGDdiQJGkwDC9JUnMML0ma\nA6PRiCRrmkaj0azLP47HvCRpDiSrOqR0nI387PaYlyRpLhhekqTmGF6SpOYYXpKk5hhekqTmGF6S\npOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTm\nGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhe\nkqTmGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTmGF4bZDQakWRN02g0mnX5kjRoqarNW1lS\nm7m+WUqyrvvPy/Okk99oNOLw4cNrvv/WrVs5dOhQjxXNpyF/JiWhqlZVoOG1QYb8RpE203q3BXB7\n6MOQP5PWEl7uNpQkNcfwkiQ1x/CSJDVnqvBKsj3JvUn2JblyidtPT3JTkruSfCrJG3uvVJKkzooD\nNpJsAfYBFwAPAXcAl1TVvRNt3gqcXlVvTXIGcB+wtar+adGyHLAxpXl5nnTyc8DGMAz5M2mjBmyc\nD9xfVQ9W1RHgBmDHojYFPKe7/Bzgi4uDS5KkvkwTXtuA/RPXD3TzJr0L+PYkDwF3A2/upzxJko7X\n14CNC4E7q+pM4OXAbyR5dk/LliTpGKdO0eYgcPbE9bO6eZMuB/4bQFX9Q5LPAi8B/nbxwnbt2vXU\n5YWFBRYWFlZVsCSpbXv27GHPnj3rWsY0AzZOYTwA4wLgYeB24NKq2jvR5jeAR6rq6iRbGYfWy6rq\n0UXLcsDGlObledLJzwEbwzDkz6S1DNhYsedVVU8kuQLYzXg347VVtTfJzvHNdQ3wS8B/T/LJ7m4/\nszi4JEnqi99tuEGG/F+OtJnseQ3DkD+T/G5DSdJcMLwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8\nJEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJ\nqzIajUiy5mk0Gq27hmzmz2snqXn5Oe8h/+S2tJnWuy2A20Mf+vxM6vs1TUJVrWqh9rwkSc0xvCRJ\nzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0x\nvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwk\nSc0xvCRJzTG8JEnNMbwkSc2ZKrySbE9yb5J9Sa5cps1CkjuT/F2SW/stU5Kkp6WqTtwg2QLsAy4A\nHgLuAC6pqnsn2jwX+BvgdVV1MMkZVfWFJZZVK63vZJFkXfefl+dJJ7/1bgvg9tCHPj+T+n5Nk1BV\nq1roND2v84H7q+rBqjoC3ADsWNTmMuCDVXWwK+q44JIkqS/ThNc2YP/E9QPdvEnfCjw/ya1J7kjy\nQ30VKEnSYqf2uJxXAN8HfB1wW5LbqurTixvu2rXrqcsLCwssLCz0VIIkqRWTWbAW0xzzehWwq6q2\nd9ffAlRV/fJEmyuBZ1bV1d319wIfraoPLlqWx7ymNC/Pk05+HvMahnk85nUH8OIk5yQ5DbgEuGlR\nmxuB1yQ5JcmzgO8C9q6mEEmSprXibsOqeiLJFcBuxmF3bVXtTbJzfHNdU1X3JrkF+CTwBHBNVd2z\noZVLkubWirsNe12Zuw2nNi/Pk05+7jYchnncbShJ0qAYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTm\nGF6SpOYYXpKk5hhekqTmGF6SpOYYXpKk5hhekqTmGF6SpOYYXtJJYDQakWTN02g0mvVDkFbFn0TZ\nIP4kijbTkH92ZMi1zRN/EkWSpBkzvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8\nJEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJ\nzTG8pBkZjUYkWdM0Go1mXb40U6mqzVtZUpu5vllKsq77z8vzNM/6fI+sd1mLl9enIdc2T4b8fktC\nVa1qofa8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNMbwkSc0xvCRJzTG8JEnNmSq8\nkmxPcm+SfUmuPEG7VyY5kuQN/ZUoSdKxVgyvJFuAdwEXAucClyZ5yTLt3gHc0neRkiRNmqbndT5w\nf1U9WFVHgBuAHUu0+yngA8AjPdYnSdJxpgmvbcD+iesHunlPSXIm8Pqq+k1g/V83LEnSCZza03J+\nDZg8FrZsgO3ateupywsLCywsLPRUgiSpFZNZsBYr/p5XklcBu6pqe3f9LUBV1S9PtPnM0YvAGcDj\nwI9V1U2LluXveU1pXp6neTbk31fq05BrmydDfr+t5fe8pul53QG8OMk5wMPAJcCli4r4pokirgP+\nZHFwSZLUlxXDq6qeSHIFsJvxMbJrq2pvkp3jm+uaxXfZgDolSXrKirsNe11Zz7sNR6MRhw8fXtN9\nt27dyqFDh3qrZTF3G2olQ96N06ch1zZPhvx+W8tuw6bDa8gBMeTaNAxD/jDp05BrmydDfr+tJbz8\neihJUnMML0lScwwvSVJzDC9JUnMML0lScwwvSVJzDC9JUnMML0lScwwvSVJzDC9JUnMML0lScwwv\nSVJzDC9JUnMML0lScwwvSVJzDC9JUnMML0nqyWg0IsmaptFoNOvym+IvKW+QIdemYRjyL9v2aci1\n9W3I2/2Q32/+krIkaS4YXpKk5hhekqTmGF6SpOYYXpKk5hheWpf1DA12eLCktXKo/AYZcm19mqdh\n0H0b8tDlPg25tr4Nebsf8vvNofKSpLlgeEmSmmN4SZKaY3hJmlsOOGqXAzY2yJBr69M8HYzv25AP\noPdpnmob8nY/5PebAzYkSXPB8JIkNcfwkiQ1x/CSJDXH8JIkNcfwkiQ1x/CSJDVn08PLEwIlSevV\nVM/r8OHDsy5BkjQATYWXJElgeEmSGmR4SZKaY3hJkppjeEmSmmN4SZKaM1V4Jdme5N4k+5JcucTt\nlyW5u5s+luRf9F+qJEljK4ZXki3Au4ALgXOBS5O8ZFGzzwD/qqpeBvwS8J6+C5Uk6ahpel7nA/dX\n1YNVdQS4Adgx2aCqPl5VX+6ufhzY1m+ZkiQ9bZrw2gbsn7h+gBOH038APrqeoiRJOpFT+1xYktcC\nlwOv6XO5kiRNmia8DgJnT1w/q5t3jCQvBa4BtlfVY/2UJ0k6Ge3atWtd909VnbhBcgpwH3AB8DBw\nO3BpVe2daHM28OfAD1XVx0+wrBOvbAqT9SbpbVl9G3JtfVrv44R2Hmvf+nyPDPl1mKfahrzdD/n9\nloSqWtVCV+x5VdUTSa4AdjM+RnZtVe1NsnN8c10D/ALwfODdGT+qI1V1/moKkSRpWiv2vHpdmT2v\nqbXSGxnyf9VDN+T/hPs0T7UNebsf8vttLT0vv2FDktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LU\nHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzD\nS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS\n1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQc\nw0uS1BzDS5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS1JypwivJ9iT3JtmX5Mpl2vx6kvuT3JXk\nvH7LlCTpaSuGV5ItwLuAC4FzgUuTvGRRm4uAb66qbwF2Ar+1AbWqJ3v27Jl1CZK0LtP0vM4H7q+q\nB6vqCHADsGNRmx3A+wCq6hPAc5Ns7bVS9cbwktS6acJrG7B/4vqBbt6J2hxcoo0kSb1wwIYkqTmn\nTtHmIHD2xPWzunmL23zjCm16kWSQy+rbRtd29dVXb+jyV2PIr8OQ9f28Dfl1mJfa5uVx9rG8acLr\nDuDFSc4BHgYuAS5d1OYm4CeBP0zyKuBLVXV48YKqarivjCSpGSuGV1U9keQKYDfj3YzXVtXeJDvH\nN9c1VXVzkouTfBp4HLh8Y8uWJM2zVNWsa5AkaVU2bcDGNCc6a2MleSDJ3UnuTHL7rOuZF0muTXI4\nyScn5j0vye4k9yW5JclzZ1njPFjmdbgqyYEk/7ubts+yxpNdkrOS/EWSv0/yqSRv6uavenvYlPCa\n5kRnbYongYWqenlVnT/rYubIdYzf+5PeAvxZVX0b8BfAWze9qvmz1OsA8CtV9Ypu+tPNLmrO/BPw\nn6rqXOC7gZ/ssmDV28Nm9bymOdFZGy94esSmq6qPAY8tmr0D+N3u8u8Cr9/UoubQMq8DjLcLbYKq\nOlRVd3WXvwrsZTw6fdXbw2Z9kE1zorM2XgH/K8kdSf7jrIuZcy88OiK3qg4BL5xxPfPsiu47Wd/r\n7tvNk+SfAecBHwe2rnZ78L/w+fLqqnoFcDHj7vprZl2QnuLIqdl4N/BNVXUecAj4lRnXMxeSPBv4\nAPDmrge2+P2/4vawWeE1zYnO2mBV9XD39/PAhxnvztVsHD76/Z9JRsAjM65nLlXV5+vpIdfvAV45\ny3rmQZJTGQfX+6vqxm72qreHzQqvp050TnIa4xOdb9qkdQtI8qzuvx2SfB3wOuDvZlvVXAnHHlu5\nCXhjd/lHgBsX30Eb4pjXofugPOoNuE1sht8B7qmqd07MW/X2sGnneXVDUN/J0yc6v2NTViwAkvxz\nxr2tYnxy+u/7GmyOJNcDC8ALgMPAVcAfA3/E+GvVHgR+sKq+NKsa58Eyr8NrGR93eRJ4ANi51LcD\nqR9JXg38JfApxp9FBfwscDvwP1jF9uBJypKk5jhgQ5LUHMNLktQcw0uS1BzDS5LUHMNLktQcw0uS\n1BzDS5LUHMNLktSc/w8wgwgQfs+JMQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7cf5a90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "from matplotlib import pyplot as plt\n",
    "f, ax = plt.subplots(figsize=(7, 5))\n",
    "ax.bar(np.arange(20), p, color='k')\n",
    "ax.set_title(\"Feature p values\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
